Notebook following this tutorial: https://blogs.rstudio.com/tensorflow/posts/2018-06-25-sunspots-lstm/
knitr::opts_chunk$set(eval = T, comment = "", results = "hold", message = F, tidy = T, dpi = 360)
library(tidyverse)
library(glue)
library(timetk)
library(tidyquant)
library(tibbletime)
library(cowplot)
library(recipes)
library(rsample)
library(yardstick)
library(keras)
library(tfruns)
sun_spots <- datasets::sunspot.month %>% tk_tbl() %>% mutate(index = as_date(index)) %>%
as_tbl_time(index = index)
sun_spots
# A time tibble: 3,177 x 2
# Index: index
index value
<date> <dbl>
1 1749-01-01 58
2 1749-02-01 62.6
3 1749-03-01 70
4 1749-04-01 55.7
5 1749-05-01 85
6 1749-06-01 83.5
7 1749-07-01 94.8
8 1749-08-01 66.3
9 1749-09-01 75.9
10 1749-10-01 75.5
# ... with 3,167 more rows
p1 <- sun_spots %>% ggplot(aes(index, value)) + geom_point(color = palette_light()[[1]],
alpha = 0.5) + theme_tq() + labs(title = "From 1749 to 2013 (full data)")
p2 <- sun_spots %>% filter_time("start" ~ "1800") %>% ggplot(aes(index, value)) +
geom_line(color = palette_light()[[1]], alpha = 0.5) + geom_point(color = palette_light()[[1]]) +
geom_smooth(method = "loess", span = 0.2, se = F) + theme_tq() + labs(title = "1749 to 1759 (Zoomed In To Show Changes over the Year)",
caption = "datasets::sunspot.month")
p_title <- ggdraw() + draw_label("Sunspots", size = 18, fontface = "bold", colour = palette_light()[[1]]) # Not color
plot_grid(p_title, p1, p2, ncol = 1, rel_heights = c(0.1, 1, 1))
periods_train <- 12 * 100
periods_test <- 12 * 50
skip_span <- 12 * 22
rolling_origin_resamples <- rolling_origin(sun_spots, initial = periods_train,
assess = periods_test, cumulative = F, skip = skip_span)
rolling_origin_resamples
splits id
plot_split <- function(split, expand_y_axis = T, alpha = 1, size = 1, base_size = 14) {
train_tbl <- training(split) %>% add_column(key = "training")
test_tbl <- testing(split) %>% add_column(key = "testing")
data_manipulated <- bind_rows(train_tbl, test_tbl) %>% as_tbl_time(index = index) %>%
mutate(key = fct_relevel(key, "training", "testing"))
train_time_summary <- train_tbl %>% tk_index() %>% tk_get_timeseries_summary()
test_time_summary <- test_tbl %>% tk_index() %>% tk_get_timeseries_summary()
g <- data_manipulated %>% ggplot() + aes(x = index, y = value, color = key) +
geom_line(size = size, alpha = alpha) + theme_tq(base_size = base_size) +
scale_color_tq() + labs(title = glue("Split: {split$id}"), subtitle = glue("{train_time_summary$start} to ",
"{test_time_summary$end}"), y = "", x = "") + theme(legend.position = "none")
if (expand_y_axis) {
sun_spots_time_summary <- sun_spots %>% tk_index() %>% tk_get_timeseries_summary()
g <- g + scale_x_date(limits = c(sun_spots_time_summary$start, sun_spots_time_summary$end))
}
g
}
rolling_origin_resamples$splits[[1]] %>% plot_split(expand_y_axis = T) + theme(legend.position = "bottom")
# Plotting function that scales to all splits
plot_sampling_plan <- function(sampling_tbl, expand_y_axis = TRUE, ncol = 3,
alpha = 1, size = 1, base_size = 14, title = "Sampling Plan") {
# Map plot_split() to sampling_tbl
sampling_tbl_with_plots <- sampling_tbl %>% mutate(gg_plots = map(splits,
plot_split, expand_y_axis = expand_y_axis, alpha = alpha, base_size = base_size))
# Make plots with cowplot
plot_list <- sampling_tbl_with_plots$gg_plots
p_temp <- plot_list[[1]] + theme(legend.position = "bottom")
legend <- get_legend(p_temp)
p_body <- plot_grid(plotlist = plot_list, ncol = ncol)
p_title <- ggdraw() + draw_label(title, size = 14, fontface = "bold", colour = palette_light()[[1]])
g <- plot_grid(p_title, p_body, legend, ncol = 1, rel_heights = c(0.05,
1, 0.05))
g
}
rolling_origin_resamples %>% plot_sampling_plan(expand_y_axis = T, ncol = 3,
alpha = 1, size = 1, base_size = 10, title = "Backtesting Strategy: Rolling Origin Sampling Plan")
rolling_origin_resamples %>% plot_sampling_plan(expand_y_axis = F, ncol = 3,
alpha = 1, size = 1, base_size = 10, title = "Backtesting Strategy: Zoomed In")
Take one split only
example_split <- rolling_origin_resamples$splits[[6]]
example_split_id <- rolling_origin_resamples$id[[6]]
plot_split(example_split, expand_y_axis = F, size = 0.5) + theme(legend.position = "bottom") +
ggtitle(glue("Split: {example_split_id}"))
df_trn <- analysis(example_split)[1:800, , drop = F]
df_val <- analysis(example_split)[801:1200, , drop = F]
df_tst <- assessment(example_split)
df <- bind_rows(df_trn %>% add_column(key = "training"), df_val %>% add_column(key = "validation"),
df_tst %>% add_column(key = "testing")) %>% as_tbl_time(index = index)
df
# A time tibble: 1,800 x 3
# Index: index
index value key
<date> <dbl> <chr>
1 1859-06-01 87.1 training
2 1859-07-01 95.2 training
3 1859-08-01 107. training
4 1859-09-01 106. training
5 1859-10-01 115. training
6 1859-11-01 97.2 training
7 1859-12-01 81 training
8 1860-01-01 82.4 training
9 1860-02-01 88.3 training
10 1860-03-01 98.9 training
# ... with 1,790 more rows
rec_obj <- recipe(value ~ ., df) %>% step_sqrt(value) %>% step_center(value) %>%
step_scale(value) %>% prep()